library("FRESA.CAD")
library(readxl)
library(igraph)
library(umap)
library(tsne)
library(entropy)
library(TH.data)
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
pander::panderOptions('table.split.table', 400)
pander::panderOptions('keep.trailing.zeros',TRUE)
About Dataset Coronaviruses are a broad family of viruses that have been linked to illnesses ranging from the common cold to more serious conditions such as Middle East Respiratory Syndrome (MERS) and Severe Acute Respiratory Syndrome (SARS) (SARS. In 2019, a new coronavirus (COVID-19) was discovered in Wuhan, China.
Sometimes, When PCR test resources are scarce and antigen test kits are inaccurate, clinicians look for alternate COVID-19 testing methods that can be completed in a day and handle thousands of samples. COVID-19 virus proteins should be lacking in normal people’s saliva. Some distinct proteins may be produced in response to COVID-19 infection and can be utilized as a signature to identify potentially infected people. Therefore, protein profiles in a patient’s saliva can indicate that he or she is infected with COVID-19.
Mass spectrometry is a method for determining the protein composition of a material. Saliva samples from hundreds of patients were studied in this dataset. So, a machine learning specialist is approached and charged with developing a machine learning model that can identify who COVID-19 infected since the PCR test cannot interpret the result completely.
https://www.kaggle.com/datasets/kerneler/saliva-testing-dataset?select=COVID-19_MS_dataset_train.csv
COVID_19_MS <- read.csv("~/GitHub/LatentBiomarkers/Data/COVID19_MS/COVID-19_MS_dataset_train.csv")
colnames(COVID_19_MS) <- str_replace_all(colnames(COVID_19_MS),"\\.","_")
COVID_19_MS$Person_ID <- NULL
sampleID <- unique(COVID_19_MS$Sample_ID)
spectraID <- colnames(COVID_19_MS)[str_detect(colnames(COVID_19_MS),"X")]
avgCOVID19 <- NULL
class <- COVID_19_MS[!duplicated(COVID_19_MS$Sample_ID),"PCR_result"]
for (id in sampleID)
{
avgCOVID19 <- rbind(avgCOVID19,apply(COVID_19_MS[COVID_19_MS$Sample_ID %in% id,spectraID],2,mean))
}
avgCOVID19 <- as.data.frame(avgCOVID19)
rownames(avgCOVID19) <- sampleID
avgCOVID19$class <- 1*(class=="pos")
pander::pander(table(avgCOVID19$class))
| 0 | 1 |
|---|---|
| 37 | 109 |
studyName <- "COVID19_MS"
dataframe <- avgCOVID19
outcome <- "class"
thro <- 0.80
TopVariables <- 10
cexheat = 0.25
Some libraries
library(psych)
library(whitening)
library("vioplot")
library("rpart")
pander::pander(c(rows=nrow(dataframe),col=ncol(dataframe)-1))
| rows | col |
|---|---|
| 146 | 2715 |
pander::pander(table(dataframe[,outcome]))
| 0 | 1 |
|---|---|
| 37 | 109 |
varlist <- colnames(dataframe)
varlist <- varlist[varlist != outcome]
largeSet <- length(varlist) > 1500
Scaling and removing near zero variance columns and highly co-linear(r>0.99999) columns
### Some global cleaning
sdiszero <- apply(dataframe,2,sd) > 1.0e-16
dataframe <- dataframe[,sdiszero]
varlist <- colnames(dataframe)[colnames(dataframe) != outcome]
tokeep <- c(as.character(correlated_Remove(dataframe,varlist,thr=0.99999)),outcome)
dataframe <- dataframe[,tokeep]
varlist <- colnames(dataframe)
varlist <- varlist[varlist != outcome]
iscontinous <- sapply(apply(dataframe,2,unique),length) >= 5 ## Only variables with enough samples
dataframeScaled <- FRESAScale(dataframe,method="OrderLogit")$scaledData
numsub <- nrow(dataframe)
if (numsub > 1000) numsub <- 1000
if (!largeSet)
{
hm <- heatMaps(data=dataframeScaled[1:numsub,],
Outcome=outcome,
Scale=TRUE,
hCluster = "row",
xlab="Feature",
ylab="Sample",
srtCol=45,
srtRow=45,
cexCol=cexheat,
cexRow=cexheat
)
par(op)
}
The heat map of the data
if (!largeSet)
{
par(cex=0.6,cex.main=0.85,cex.axis=0.7)
#cormat <- Rfast::cora(as.matrix(dataframe[,varlist]),large=TRUE)
cormat <- cor(dataframe[,varlist],method="pearson")
cormat[is.na(cormat)] <- 0
gplots::heatmap.2(abs(cormat),
trace = "none",
# scale = "row",
mar = c(5,5),
col=rev(heat.colors(5)),
main = "Original Correlation",
cexRow = cexheat,
cexCol = cexheat,
srtCol=45,
srtRow=45,
key.title=NA,
key.xlab="|Pearson Correlation|",
xlab="Feature", ylab="Feature")
diag(cormat) <- 0
print(max(abs(cormat)))
}
DEdataframe <- IDeA(dataframe,verbose=TRUE,thr=thro)
#>
#> Included: 2328 , Uni p: 0.004316151 , Uncorrelated Base: 1297 , Outcome-Driven Size: 0 , Base Size: 1297
#>
#>
1 <R=0.994,r=0.972,N= 27>, Top: 11( 1 )[ 1 : 11 Fa= 11 : 0.972 ]( 11 , 14 , 0 ),<|>Tot Used: 25 , Added: 14 , Zero Std: 0 , Max Cor: 0.970
#>
2 <R=0.970,r=0.960,N= 27>, Top: 5( 1 )[ 1 : 5 Fa= 14 : 0.960 ]( 4 , 5 , 11 ),<|>Tot Used: 33 , Added: 5 , Zero Std: 0 , Max Cor: 0.959
#>
3 <R=0.959,r=0.955,N= 27>, Top: 3( 3 )[ 1 : 3 Fa= 15 : 0.955 ]( 3 , 6 , 14 ),<|>Tot Used: 37 , Added: 6 , Zero Std: 0 , Max Cor: 0.954
#>
4 <R=0.954,r=0.952,N= 27>, Top: 1( 1 )[ 1 : 1 Fa= 15 : 0.952 ]( 1 , 1 , 15 ),<|>Tot Used: 38 , Added: 1 , Zero Std: 0 , Max Cor: 0.951
#>
5 <R=0.951,r=0.925,N= 31>, Top: 14( 1 )[ 1 : 14 Fa= 23 : 0.925 ]( 14 , 16 , 15 ),<|>Tot Used: 59 , Added: 16 , Zero Std: 0 , Max Cor: 0.925
#>
6 <R=0.925,r=0.913,N= 31>, Top: 15( 1 )[ 1 : 15 Fa= 33 : 0.913 ]( 15 , 17 , 23 ),<|>Tot Used: 85 , Added: 17 , Zero Std: 0 , Max Cor: 0.912
#>
7 <R=0.912,r=0.906,N= 31>, Top: 6( 2 )[ 1 : 6 Fa= 36 : 0.906 ]( 6 , 8 , 33 ),<|>Tot Used: 96 , Added: 8 , Zero Std: 0 , Max Cor: 0.905
#>
8 <R=0.905,r=0.903,N= 31>, Top: 3( 1 )[ 1 : 3 Fa= 38 : 0.903 ]( 3 , 3 , 36 ),<|>Tot Used: 101 , Added: 3 , Zero Std: 0 , Max Cor: 0.902
#>
9 <R=0.902,r=0.851,N= 105>, Top: 38( 1 )[ 1 : 38 Fa= 65 : 0.851 ]( 38 , 57 , 38 ),<|>Tot Used: 175 , Added: 57 , Zero Std: 0 , Max Cor: 0.885
#>
10 <R=0.885,r=0.843,N= 105>, Top: 8( 1 )[ 1 : 8 Fa= 70 : 0.843 ]( 8 , 9 , 65 ),<|>Tot Used: 188 , Added: 9 , Zero Std: 0 , Max Cor: 0.842
#>
11 <R=0.842,r=0.800,N= 145>, Top: 53( 1 )[ 1 : 53 Fa= 102 : 0.800 ]( 53 , 72 , 70 ),<|>Tot Used: 283 , Added: 72 , Zero Std: 0 , Max Cor: 0.871
#>
12 <R=0.871,r=0.800,N= 145>, Top: 3( 1 )[ 1 : 3 Fa= 105 : 0.800 ]( 3 , 4 , 102 ),<|>Tot Used: 289 , Added: 4 , Zero Std: 0 , Max Cor: 0.800
#>
13 <R=0.800,r=0.800,N= 0>
#>
[ 13 ], 0.7981248 Decor Dimension: 289 Nused: 289 . Cor to Base: 148 , ABase: 30 , Outcome Base: 0
#>
varlistc <- colnames(DEdataframe)[colnames(DEdataframe) != outcome]
pander::pander(sum(apply(dataframe[,varlist],2,var)))
23332
pander::pander(sum(apply(DEdataframe[,varlistc],2,var)))
3247
pander::pander(entropy(discretize(unlist(dataframe[,varlist]), 256)))
0.132
pander::pander(entropy(discretize(unlist(DEdataframe[,varlistc]), 256)))
0.324
if (!largeSet)
{
par(cex=0.6,cex.main=0.85,cex.axis=0.7)
UPSTM <- attr(DEdataframe,"UPSTM")
gplots::heatmap.2(1.0*(abs(UPSTM)>0),
trace = "none",
mar = c(5,5),
col=rev(heat.colors(5)),
main = "Decorrelation matrix",
cexRow = cexheat,
cexCol = cexheat,
srtCol=45,
srtRow=45,
key.title=NA,
key.xlab="|Beta|>0",
xlab="Output Feature", ylab="Input Feature")
par(op)
}
if (!largeSet)
{
cormat <- cor(DEdataframe[,varlistc],method="pearson")
cormat[is.na(cormat)] <- 0
gplots::heatmap.2(abs(cormat),
trace = "none",
mar = c(5,5),
col=rev(heat.colors(5)),
main = "Correlation after IDeA",
cexRow = cexheat,
cexCol = cexheat,
srtCol=45,
srtRow=45,
key.title=NA,
key.xlab="|Pearson Correlation|",
xlab="Feature", ylab="Feature")
par(op)
diag(cormat) <- 0
print(max(abs(cormat)))
}
if (nrow(dataframe) < 1000)
{
classes <- unique(dataframe[1:numsub,outcome])
raincolors <- rainbow(length(classes))
names(raincolors) <- classes
datasetframe.umap = umap(scale(dataframe[1:numsub,varlist]),n_components=2)
plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: Original",t='n')
text(datasetframe.umap$layout,labels=dataframe[1:numsub,outcome],col=raincolors[dataframe[1:numsub,outcome]+1])
}
if (nrow(dataframe) < 1000)
{
datasetframe.umap = umap(scale(DEdataframe[1:numsub,varlistc]),n_components=2)
plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: After IDeA",t='n')
text(datasetframe.umap$layout,labels=DEdataframe[1:numsub,outcome],col=raincolors[DEdataframe[1:numsub,outcome]+1])
}
univarRAW <- uniRankVar(varlist,
paste(outcome,"~1"),
outcome,
dataframe,
rankingTest="AUC")
100 : X297_73 200 : X382_77 300 : X479_57 400 : X629_81 500 :
X796_11
600 : X969_8 700 : X1173_74 800 : X1364_32 900 : X1599_26 1000 :
X1848_42
1100 : X2201_1 1200 : X2498_63 1300 : X2860_34 1400 : X3257_2 1500 :
X3669_6
1600 : X4208_08 1700 : X4836_61 1800 : X5379_65 1900 : X6237_78 2000 :
X7115_25
2100 : X8025_19 2200 : X9106_13 2300 : X10504_79 2400 : X11887_21 2500 :
X13358_42
2600 : X14760_46
univarDe <- uniRankVar(varlistc,
paste(outcome,"~1"),
outcome,
DEdataframe,
rankingTest="AUC",
)
100 : X297_73 200 : X382_77 300 : X479_57 400 : La_X629_81 500 :
X796_11
600 : La_X969_8 700 : X1173_74 800 : X1364_32 900 : X1599_26 1000 :
X1848_42
1100 : X2201_1 1200 : X2498_63 1300 : X2860_34 1400 : X3257_2 1500 :
X3669_6
1600 : X4208_08 1700 : X4836_61 1800 : X5379_65 1900 : La_X6237_78 2000
: X7115_25
2100 : X8025_19 2200 : X9106_13 2300 : X10504_79 2400 : X11887_21 2500 :
X13358_42
2600 : X14760_46
univariate_columns <- c("caseMean","caseStd","controlMean","controlStd","controlKSP","ROCAUC")
##topfive
topvar <- c(1:length(varlist)) <= TopVariables
pander::pander(univarRAW$orderframe[topvar,univariate_columns])
| caseMean | caseStd | controlMean | controlStd | controlKSP | ROCAUC | |
|---|---|---|---|---|---|---|
| X301_02 | 1.0971 | 1.288 | 5.780 | 4.428 | 0.37901 | 0.901 |
| X1714_44 | 0.9653 | 1.354 | 2.775 | 2.143 | 0.16574 | 0.876 |
| X1752_66 | 0.4318 | 0.300 | 1.092 | 0.570 | 0.52591 | 0.868 |
| X1539_08 | 0.4536 | 0.386 | 1.205 | 0.790 | 0.08336 | 0.866 |
| X900_35 | 0.7958 | 0.696 | 2.402 | 2.230 | 0.03660 | 0.841 |
| X440_87 | 1.8551 | 1.815 | 4.788 | 3.289 | 0.14184 | 0.839 |
| X1522_71 | 0.5368 | 0.438 | 1.262 | 0.787 | 0.54259 | 0.836 |
| X3325_02 | 39.9174 | 33.638 | 5.890 | 11.627 | 0.00130 | 0.833 |
| X3428_53 | 0.0818 | 0.313 | 0.566 | 1.047 | 0.00325 | 0.829 |
| X256_5 | 2.4015 | 2.773 | 8.994 | 9.942 | 0.00576 | 0.824 |
topLAvar <- univarDe$orderframe$Name[str_detect(univarDe$orderframe$Name,"La_")]
topLAvar <- unique(c(univarDe$orderframe$Name[topvar],topLAvar[1:as.integer(TopVariables/2)]))
finalTable <- univarDe$orderframe[topLAvar,univariate_columns]
theLaVar <- rownames(finalTable)[str_detect(rownames(finalTable),"La_")]
pander::pander(univarDe$orderframe[topLAvar,univariate_columns])
| caseMean | caseStd | controlMean | controlStd | controlKSP | ROCAUC | |
|---|---|---|---|---|---|---|
| X301_02 | 1.0971 | 1.2879 | 5.7798 | 4.4284 | 0.37901 | 0.901 |
| X1714_44 | 0.9653 | 1.3543 | 2.7750 | 2.1432 | 0.16574 | 0.876 |
| X1752_66 | 0.4318 | 0.3003 | 1.0915 | 0.5697 | 0.52591 | 0.868 |
| X1539_08 | 0.4536 | 0.3864 | 1.2050 | 0.7901 | 0.08336 | 0.866 |
| X900_35 | 0.7958 | 0.6958 | 2.4018 | 2.2300 | 0.03660 | 0.841 |
| X440_87 | 1.8551 | 1.8155 | 4.7878 | 3.2888 | 0.14184 | 0.839 |
| X1522_71 | 0.5368 | 0.4383 | 1.2620 | 0.7869 | 0.54259 | 0.836 |
| X3428_53 | 0.0818 | 0.3132 | 0.5656 | 1.0472 | 0.00325 | 0.829 |
| X256_5 | 2.4015 | 2.7730 | 8.9943 | 9.9421 | 0.00576 | 0.824 |
| X2870_03 | 0.6581 | 0.8780 | 1.5602 | 1.0686 | 0.75629 | 0.821 |
| La_X7409_15 | -0.0162 | 0.0816 | -0.0638 | 0.0568 | 0.56572 | 0.757 |
| La_X2962_99 | 0.2415 | 0.2010 | 0.0423 | 0.4892 | 0.05789 | 0.733 |
| La_X3325_02 | 4.0996 | 5.4444 | 1.3749 | 2.1974 | 0.12652 | 0.723 |
| La_X2472_01 | 0.3016 | 0.3318 | 0.7323 | 0.5305 | 0.47880 | 0.723 |
| La_X2671_39 | 0.5193 | 0.6291 | 0.8706 | 0.8453 | 0.02388 | 0.722 |
dc <- getLatentCoefficients(DEdataframe)
fscores <- attr(DEdataframe,"fscore")
theSigDc <- dc[theLaVar]
names(theSigDc) <- NULL
theSigDc <- unique(names(unlist(theSigDc)))
theFormulas <- dc[rownames(finalTable)]
deFromula <- character(length(theFormulas))
names(deFromula) <- rownames(finalTable)
pander::pander(c(mean=mean(sapply(dc,length)),total=length(dc),fraction=length(dc)/(ncol(dataframe)-1)))
| mean | total | fraction |
|---|---|---|
| 2.01 | 211 | 0.0795 |
allSigvars <- names(dc)
dx <- names(deFromula)[1]
for (dx in names(deFromula))
{
coef <- theFormulas[[dx]]
cname <- names(theFormulas[[dx]])
names(cname) <- cname
for (cf in names(coef))
{
if (cf != dx)
{
if (coef[cf]>0)
{
deFromula[dx] <- paste(deFromula[dx],
sprintf("+ %5.3f*%s",coef[cf],cname[cf]))
}
else
{
deFromula[dx] <- paste(deFromula[dx],
sprintf("%5.3f*%s",coef[cf],cname[cf]))
}
}
}
}
finalTable <- rbind(finalTable,univarRAW$orderframe[theSigDc[!(theSigDc %in% rownames(finalTable))],univariate_columns])
orgnamez <- rownames(finalTable)
orgnamez <- str_remove_all(orgnamez,"La_")
finalTable$RAWAUC <- univarRAW$orderframe[orgnamez,"ROCAUC"]
finalTable$DecorFormula <- deFromula[rownames(finalTable)]
finalTable$fscores <- fscores[rownames(finalTable)]
Final_Columns <- c("DecorFormula","caseMean","caseStd","controlMean","controlStd","controlKSP","ROCAUC","RAWAUC","fscores")
finalTable <- finalTable[order(-finalTable$ROCAUC),]
pander::pander(finalTable[,Final_Columns])
| DecorFormula | caseMean | caseStd | controlMean | controlStd | controlKSP | ROCAUC | RAWAUC | fscores | |
|---|---|---|---|---|---|---|---|---|---|
| X301_02 | 1.09708 | 1.2879 | 5.7798 | 4.4284 | 3.79e-01 | 0.901 | 0.901 | NA | |
| X1714_44 | 0.96530 | 1.3543 | 2.7750 | 2.1432 | 1.66e-01 | 0.876 | 0.876 | NA | |
| X1752_66 | 0.43183 | 0.3003 | 1.0915 | 0.5697 | 5.26e-01 | 0.868 | 0.868 | NA | |
| X1539_08 | 0.45356 | 0.3864 | 1.2050 | 0.7901 | 8.34e-02 | 0.866 | 0.866 | NA | |
| X900_35 | 0.79584 | 0.6958 | 2.4018 | 2.2300 | 3.66e-02 | 0.841 | 0.841 | NA | |
| X440_87 | 1.85509 | 1.8155 | 4.7878 | 3.2888 | 1.42e-01 | 0.839 | 0.839 | 1 | |
| X1522_71 | 0.53682 | 0.4383 | 1.2620 | 0.7869 | 5.43e-01 | 0.836 | 0.836 | NA | |
| X3325_02 | NA | 39.91739 | 33.6384 | 5.8901 | 11.6274 | 1.30e-03 | 0.833 | 0.833 | NA |
| X3428_53 | 0.08181 | 0.3132 | 0.5656 | 1.0472 | 3.25e-03 | 0.829 | 0.829 | NA | |
| X256_5 | 2.40151 | 2.7730 | 8.9943 | 9.9421 | 5.76e-03 | 0.824 | 0.824 | 4 | |
| X2870_03 | 0.65813 | 0.8780 | 1.5602 | 1.0686 | 7.56e-01 | 0.821 | 0.821 | NA | |
| X3283_6 | NA | 7.93877 | 7.5390 | 1.0008 | 2.1724 | 6.86e-04 | 0.812 | 0.812 | 5 |
| X3679_57 | NA | 0.16483 | 0.4102 | 0.3623 | 0.3880 | 4.48e-03 | 0.801 | 0.801 | 2 |
| La_X7409_15 | -0.293X3679_57 + 1.000X7409_15 | -0.01621 | 0.0816 | -0.0638 | 0.0568 | 5.66e-01 | 0.757 | 0.537 | -1 |
| La_X2962_99 | + 1.000X2962_99 -0.811X5722_3 | 0.24148 | 0.2010 | 0.0423 | 0.4892 | 5.79e-02 | 0.733 | 0.404 | 0 |
| La_X3325_02 | -4.512X3283_6 + 1.000X3325_02 | 4.09958 | 5.4444 | 1.3749 | 2.1974 | 1.27e-01 | 0.723 | 0.833 | -1 |
| La_X2472_01 | -6.670X606_79 + 1.000X2472_01 | 0.30158 | 0.3318 | 0.7323 | 0.5305 | 4.79e-01 | 0.723 | 0.696 | -1 |
| La_X2671_39 | -6.958X877_66 + 1.000X2671_39 | 0.51927 | 0.6291 | 0.8706 | 0.8453 | 2.39e-02 | 0.722 | 0.699 | -1 |
| X2671_39 | NA | 0.58852 | 0.5979 | 1.2166 | 2.2963 | 6.50e-05 | 0.699 | 0.699 | NA |
| X2472_01 | NA | 0.40507 | 0.2742 | 0.9290 | 1.2921 | 1.05e-02 | 0.696 | 0.696 | NA |
| X5722_3 | NA | 0.06519 | 0.0785 | 0.5365 | 1.1121 | 1.31e-03 | 0.662 | 0.662 | 3 |
| X7409_15 | NA | 0.03202 | 0.1558 | 0.0422 | 0.1033 | 2.84e-05 | 0.537 | 0.537 | NA |
| X877_66 | NA | 0.00995 | 0.0335 | 0.0497 | 0.3025 | 9.77e-10 | 0.460 | 0.460 | 2 |
| X606_79 | NA | 0.01552 | 0.0425 | 0.0295 | 0.1575 | 4.02e-09 | 0.448 | 0.448 | 2 |
| X2962_99 | NA | 0.29435 | 0.1837 | 0.4774 | 1.0962 | 1.23e-05 | 0.404 | 0.404 | NA |
featuresnames <- colnames(dataframe)[colnames(dataframe) != outcome]
pc <- prcomp(dataframe[,iscontinous],center = TRUE,scale. = TRUE) #principal components
predPCA <- predict(pc,dataframe[,iscontinous])
PCAdataframe <- as.data.frame(cbind(predPCA,dataframe[,!iscontinous]))
colnames(PCAdataframe) <- c(colnames(predPCA),colnames(dataframe)[!iscontinous])
#plot(PCAdataframe[,colnames(PCAdataframe)!=outcome],col=dataframe[,outcome],cex=0.65,cex.lab=0.5,cex.axis=0.75,cex.sub=0.5,cex.main=0.75)
#pander::pander(pc$rotation)
PCACor <- cor(PCAdataframe[,colnames(PCAdataframe) != outcome])
gplots::heatmap.2(abs(PCACor),
trace = "none",
# scale = "row",
mar = c(5,5),
col=rev(heat.colors(5)),
main = "PCA Correlation",
cexRow = 0.5,
cexCol = 0.5,
srtCol=45,
srtRow= -45,
key.title=NA,
key.xlab="Pearson Correlation",
xlab="Feature", ylab="Feature")
EFAdataframe <- dataframeScaled
if (length(iscontinous) < 2000)
{
topred <- min(length(iscontinous),nrow(dataframeScaled),ncol(predPCA)/2)
if (topred < 2) topred <- 2
uls <- fa(dataframeScaled[,iscontinous],nfactors=topred,rotate="varimax",warnings=FALSE) # EFA analysis
predEFA <- predict(uls,dataframeScaled[,iscontinous])
EFAdataframe <- as.data.frame(cbind(predEFA,dataframeScaled[,!iscontinous]))
colnames(EFAdataframe) <- c(colnames(predEFA),colnames(dataframeScaled)[!iscontinous])
EFACor <- cor(EFAdataframe[,colnames(EFAdataframe) != outcome])
gplots::heatmap.2(abs(EFACor),
trace = "none",
# scale = "row",
mar = c(5,5),
col=rev(heat.colors(5)),
main = "EFA Correlation",
cexRow = 0.5,
cexCol = 0.5,
srtCol=45,
srtRow= -45,
key.title=NA,
key.xlab="Pearson Correlation",
xlab="Feature", ylab="Feature")
}
par(op)
par(xpd = TRUE)
dataframe[,outcome] <- factor(dataframe[,outcome])
rawmodel <- rpart(paste(outcome,"~."),dataframe,control=rpart.control(maxdepth=3))
pr <- predict(rawmodel,dataframe,type = "class")
ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
if (length(unique(pr))>1)
{
plot(rawmodel,main="Raw",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
text(rawmodel, use.n = TRUE,cex=0.75)
ptab <- epiR::epi.tests(table(pr==0,dataframe[,outcome]==0))
}
pander::pander(table(dataframe[,outcome],pr))
| 0 | 1 | |
|---|---|---|
| 0 | 31 | 6 |
| 1 | 4 | 105 |
pander::pander(ptab)
detail:
| statistic | est | lower | upper |
|---|---|---|---|
| ap | 0.7603 | 0.6827 | 0.8270 |
| tp | 0.7466 | 0.6680 | 0.8149 |
| se | 0.9633 | 0.9087 | 0.9899 |
| sp | 0.8378 | 0.6799 | 0.9381 |
| diag.ac | 0.9315 | 0.8776 | 0.9667 |
| diag.or | 135.6250 | 35.9750 | 511.3030 |
| nndx | 1.2482 | 1.0776 | 1.6990 |
| youden | 0.8011 | 0.5886 | 0.9280 |
| pv.pos | 0.9459 | 0.8861 | 0.9799 |
| pv.neg | 0.8857 | 0.7326 | 0.9680 |
| lr.pos | 5.9404 | 2.8532 | 12.3678 |
| lr.neg | 0.0438 | 0.0166 | 0.1158 |
| p.rout | 0.2397 | 0.1730 | 0.3173 |
| p.rin | 0.7603 | 0.6827 | 0.8270 |
| p.tpdn | 0.1622 | 0.0619 | 0.3201 |
| p.tndp | 0.0367 | 0.0101 | 0.0913 |
| p.dntp | 0.0541 | 0.0201 | 0.1139 |
| p.dptn | 0.1143 | 0.0320 | 0.2674 |
tab:
| Outcome + | Outcome - | Total | |
|---|---|---|---|
| Test + | 105 | 6 | 111 |
| Test - | 4 | 31 | 35 |
| Total | 109 | 37 | 146 |
method: exact
digits: 2
conf.level: 0.95
pander::pander(ptab$detail[c(5,3,4,6),])
| statistic | est | lower | upper | |
|---|---|---|---|---|
| 5 | diag.ac | 0.932 | 0.878 | 0.967 |
| 3 | se | 0.963 | 0.909 | 0.990 |
| 4 | sp | 0.838 | 0.680 | 0.938 |
| 6 | diag.or | 135.625 | 35.975 | 511.303 |
par(op)
par(xpd = TRUE)
DEdataframe[,outcome] <- factor(DEdataframe[,outcome])
IDeAmodel <- rpart(paste(outcome,"~."),DEdataframe,control=rpart.control(maxdepth=3))
pr <- predict(IDeAmodel,DEdataframe,type = "class")
ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
if (length(unique(pr))>1)
{
plot(IDeAmodel,main="IDeA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
text(IDeAmodel, use.n = TRUE,cex=0.75)
ptab <- epiR::epi.tests(table(pr==0,DEdataframe[,outcome]==0))
}
pander::pander(table(DEdataframe[,outcome],pr))
| 0 | 1 | |
|---|---|---|
| 0 | 31 | 6 |
| 1 | 3 | 106 |
pander::pander(ptab)
detail:
| statistic | est | lower | upper |
|---|---|---|---|
| ap | 0.7671 | 0.69007 | 0.8330 |
| tp | 0.7466 | 0.66800 | 0.8149 |
| se | 0.9725 | 0.92167 | 0.9943 |
| sp | 0.8378 | 0.67986 | 0.9381 |
| diag.ac | 0.9384 | 0.88621 | 0.9714 |
| diag.or | 182.5556 | 43.14327 | 772.4619 |
| nndx | 1.2341 | 1.07254 | 1.6624 |
| youden | 0.8103 | 0.60153 | 0.9324 |
| pv.pos | 0.9464 | 0.88704 | 0.9801 |
| pv.neg | 0.9118 | 0.76322 | 0.9814 |
| lr.pos | 5.9969 | 2.88107 | 12.4826 |
| lr.neg | 0.0328 | 0.01067 | 0.1012 |
| p.rout | 0.2329 | 0.16698 | 0.3099 |
| p.rin | 0.7671 | 0.69007 | 0.8330 |
| p.tpdn | 0.1622 | 0.06193 | 0.3201 |
| p.tndp | 0.0275 | 0.00571 | 0.0783 |
| p.dntp | 0.0536 | 0.01991 | 0.1130 |
| p.dptn | 0.0882 | 0.01858 | 0.2368 |
tab:
| Outcome + | Outcome - | Total | |
|---|---|---|---|
| Test + | 106 | 6 | 112 |
| Test - | 3 | 31 | 34 |
| Total | 109 | 37 | 146 |
method: exact
digits: 2
conf.level: 0.95
pander::pander(ptab$detail[c(5,3,4,6),])
| statistic | est | lower | upper | |
|---|---|---|---|---|
| 5 | diag.ac | 0.938 | 0.886 | 0.971 |
| 3 | se | 0.972 | 0.922 | 0.994 |
| 4 | sp | 0.838 | 0.680 | 0.938 |
| 6 | diag.or | 182.556 | 43.143 | 772.462 |
par(op)
par(xpd = TRUE)
PCAdataframe[,outcome] <- factor(PCAdataframe[,outcome])
PCAmodel <- rpart(paste(outcome,"~."),PCAdataframe,control=rpart.control(maxdepth=3))
pr <- predict(PCAmodel,PCAdataframe,type = "class")
ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
if (length(unique(pr))>1)
{
plot(PCAmodel,main="PCA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
text(PCAmodel, use.n = TRUE,cex=0.75)
ptab <- epiR::epi.tests(table(pr==0,PCAdataframe[,outcome]==0))
}
pander::pander(table(PCAdataframe[,outcome],pr))
| 0 | 1 | |
|---|---|---|
| 0 | 33 | 4 |
| 1 | 11 | 98 |
pander::pander(ptab)
detail:
| statistic | est | lower | upper |
|---|---|---|---|
| ap | 0.6986 | 0.6173 | 0.7717 |
| tp | 0.7466 | 0.6680 | 0.8149 |
| se | 0.8991 | 0.8266 | 0.9485 |
| sp | 0.8919 | 0.7458 | 0.9697 |
| diag.ac | 0.8973 | 0.8362 | 0.9413 |
| diag.or | 73.5000 | 21.9076 | 246.5926 |
| nndx | 1.2643 | 1.0890 | 1.7469 |
| youden | 0.7910 | 0.5724 | 0.9183 |
| pv.pos | 0.9608 | 0.9026 | 0.9892 |
| pv.neg | 0.7500 | 0.5966 | 0.8681 |
| lr.pos | 8.3165 | 3.2891 | 21.0284 |
| lr.neg | 0.1131 | 0.0639 | 0.2004 |
| p.rout | 0.3014 | 0.2283 | 0.3827 |
| p.rin | 0.6986 | 0.6173 | 0.7717 |
| p.tpdn | 0.1081 | 0.0303 | 0.2542 |
| p.tndp | 0.1009 | 0.0515 | 0.1734 |
| p.dntp | 0.0392 | 0.0108 | 0.0974 |
| p.dptn | 0.2500 | 0.1319 | 0.4034 |
tab:
| Outcome + | Outcome - | Total | |
|---|---|---|---|
| Test + | 98 | 4 | 102 |
| Test - | 11 | 33 | 44 |
| Total | 109 | 37 | 146 |
method: exact
digits: 2
conf.level: 0.95
pander::pander(ptab$detail[c(5,3,4,6),])
| statistic | est | lower | upper | |
|---|---|---|---|---|
| 5 | diag.ac | 0.897 | 0.836 | 0.941 |
| 3 | se | 0.899 | 0.827 | 0.949 |
| 4 | sp | 0.892 | 0.746 | 0.970 |
| 6 | diag.or | 73.500 | 21.908 | 246.593 |
par(op)
EFAdataframe[,outcome] <- factor(EFAdataframe[,outcome])
EFAmodel <- rpart(paste(outcome,"~."),EFAdataframe,control=rpart.control(maxdepth=3))
pr <- predict(EFAmodel,EFAdataframe,type = "class")
ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
if (length(unique(pr))>1)
{
plot(EFAmodel,main="EFA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
text(EFAmodel, use.n = TRUE,cex=0.75)
ptab <- epiR::epi.tests(table(pr==0,EFAdataframe[,outcome]==0))
}
pander::pander(table(EFAdataframe[,outcome],pr))
| 0 | 1 | |
|---|---|---|
| 0 | 31 | 6 |
| 1 | 4 | 105 |
pander::pander(ptab)
detail:
| statistic | est | lower | upper |
|---|---|---|---|
| ap | 0.7603 | 0.6827 | 0.8270 |
| tp | 0.7466 | 0.6680 | 0.8149 |
| se | 0.9633 | 0.9087 | 0.9899 |
| sp | 0.8378 | 0.6799 | 0.9381 |
| diag.ac | 0.9315 | 0.8776 | 0.9667 |
| diag.or | 135.6250 | 35.9750 | 511.3030 |
| nndx | 1.2482 | 1.0776 | 1.6990 |
| youden | 0.8011 | 0.5886 | 0.9280 |
| pv.pos | 0.9459 | 0.8861 | 0.9799 |
| pv.neg | 0.8857 | 0.7326 | 0.9680 |
| lr.pos | 5.9404 | 2.8532 | 12.3678 |
| lr.neg | 0.0438 | 0.0166 | 0.1158 |
| p.rout | 0.2397 | 0.1730 | 0.3173 |
| p.rin | 0.7603 | 0.6827 | 0.8270 |
| p.tpdn | 0.1622 | 0.0619 | 0.3201 |
| p.tndp | 0.0367 | 0.0101 | 0.0913 |
| p.dntp | 0.0541 | 0.0201 | 0.1139 |
| p.dptn | 0.1143 | 0.0320 | 0.2674 |
tab:
| Outcome + | Outcome - | Total | |
|---|---|---|---|
| Test + | 105 | 6 | 111 |
| Test - | 4 | 31 | 35 |
| Total | 109 | 37 | 146 |
method: exact
digits: 2
conf.level: 0.95
pander::pander(ptab$detail[c(5,3,4,6),])
| statistic | est | lower | upper | |
|---|---|---|---|---|
| 5 | diag.ac | 0.932 | 0.878 | 0.967 |
| 3 | se | 0.963 | 0.909 | 0.990 |
| 4 | sp | 0.838 | 0.680 | 0.938 |
| 6 | diag.or | 135.625 | 35.975 | 511.303 |
par(op)